2.0: Loading required packages:¶

In [62]:
# Administrative stuff:
import os

# For Data manipulation:
import pandas as pd
import numpy as np
import random
import re

# Nice to haves
#import geopandas as gpd
import matplotlib.pyplot as plt
%matplotlib inline
from shapely.geometry import Point, Polygon
# import googlemaps
from plotly import graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
import requests
import json
import pandas as pd

## dynamic text updating
#from IPython.display import Markdown as md

## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

from dash import Dash, dcc, html, Input, Output
import plotly.express as px
import seaborn as sns

2.1: Getting the Cleaned and Merged Data¶

In [63]:
%store -r air_data
%store -r air_ken
%store -r air_ken_2019
%store -r health_county_names
%store -r health_data
%store -r health_ken
%store -r health_ken_2019
%store -r health_ken_2019_clean
%store -r health_ken_2019_fm
%store -r so2
%store -r so2_county_names
%store -r so2_ken
%store -r airq_0
%store -r so2_ken_f
%store -r good_days_ken
%store -r daily_aqi
%store -r daily_aqi_ken

%store -r so2_reshape
%store -r merged_ahs
%store -r merged_ah
no stored variable or alias airq_0
In [64]:
pd.set_option('display.max_columns',500)
pd.set_option('display.width',500)
In [65]:
merged_ahs.head(2)
merged_ahs.shape
Out[65]:
index_air State County Year_air Days with AQI Good Days Moderate Days Unhealthy for Sensitive Groups Days Unhealthy Days Very Unhealthy Days Hazardous Days Max AQI 90th Percentile AQI Median AQI Days CO Days NO2 Days Ozone Days PM2.5 Days PM10 % of good days index_health Year_health CountyName CountyFIPS Category Measure Data_Value_Unit Data_Value_Type Data_Value TotalPopulation Geolocation LocationID MeasureId County Name Site Num so2_lat so2_long so2_city_name so2_n_dates so2_good_days so2_moderate_days so2_unhealthy_days so2_unhealthy_sens_days so2_avg_aqi so2_max_aqi so2_min_AQI health_long health_lat dist precise
0 328 Kentucky Boyd 2019 365 313 52 0 0 0 0 74 53 39 0 11 192 160 2 85.753425 764356 2019 Boyd 21019 Health Status Physical health not good for >=14 days among a... % Crude prevalence 15.4 4,484 POINT (-82.63164476 38.44922764) 21019030500 PHLTH Boyd 17 38.45934 -82.64041 Ashland 365 365 0 0 0 0.479452 9.0 0.0 -82.631645 38.449228 0.843866 True
1 328 Kentucky Boyd 2019 365 313 52 0 0 0 0 74 53 39 0 11 192 160 2 85.753425 793478 2019 Boyd 21019 Health Risk Behaviors No leisure-time physical activity among adults... % Crude prevalence 33.4 7,919 POINT (-82.71349743 38.32089352) 21019031100 LPA Boyd 17 38.45934 -82.64041 Ashland 365 365 0 0 0 0.479452 9.0 0.0 -82.713497 38.320894 10.345064 True
Out[65]:
(9276, 50)
In [66]:
daily_aqi_ken.head(2)
Out[66]:
State Name county Name State Code County Code Date AQI Category Defining Parameter Defining Site Number of Sites Reporting Month
105732 Kentucky Bell 21 13 2019-01-03 26 Good PM2.5 21-013-0002 1 1
105733 Kentucky Bell 21 13 2019-01-09 8 Good PM2.5 21-013-0002 1 1
In [67]:
groupby_d=pd.DataFrame(daily_aqi_ken.groupby(["Month","county Name"])["AQI"].mean()).reset_index()
In [68]:
groupby_d[['Month','county Name']].nunique()
Out[68]:
Month          12
county Name    27
dtype: int64

2.2: Monthly Average AQI for Kentucky¶

In [69]:
# Create a Python Numpy array (27*12 Matrix)
new_df = groupby_d.pivot(index='county Name', columns='Month')['AQI'].dropna(how='all')
new_df.head(2)
Out[69]:
Month 1 2 3 4 5 6 7 8 9 10 11 12
county Name
Bell 27.4 27.8 42.774194 47.333333 39.833333 37.333333 33.900000 37.129032 41.633333 32.645161 41.0 26.2
Boone NaN NaN 38.838710 40.866667 36.103448 38.423077 41.580645 43.709677 45.033333 31.774194 NaN NaN
In [70]:
df3=groupby_d.dropna(subset=["AQI"],how='all')
df3=df3.sort_values(["county Name"])
df3.shape
Out[70]:
(286, 3)
In [71]:
df4=df3.fillna("")
In [72]:
x=df4["Month"]

y=df4["county Name"]

z=df4["AQI"]


trace = go.Heatmap(
   x=df4["Month"],
   y=df4["county Name"],
    z=df4["AQI"],
    type = 'heatmap',
    hovertemplate='Month: %{x}<br>County: %{y}<br>Average_AQI: %{z}<extra></extra>',
    reversescale=True)

fig1 = go.Figure(
    data = trace,
    layout=go.Layout(
        title="<b>Heat Map of Monthly Average AQI for Kentucky</b>",title_x=0.5,
        yaxis={"title": '<b>County Name</b>'},
        xaxis={"title": '<b>Months</b>'},
        font=dict(
        family="Arial, Monospace",
        size=14,),
        width=1200,
        height=1400,legend={"title":"Average AQI"},paper_bgcolor="rgb(0,0,0,0)"
    ),
)
fig1.update_layout(width=700,height=700,showlegend=True)
#fig1.show()
In [73]:
fig1.write_html('2.2_Heat_Map_of__Monthly_Average_AQI_for_Kentucky_by_County.html')

2.3: Categorical Average AQI for Kentucky, by County¶

In [74]:
health_gr = pd.DataFrame(health_ken_2019_fm.groupby(["CountyName","Category","MeasureId"])
                         ["Data_Value"].mean()).reset_index()
In [75]:
x=health_gr["MeasureId"]

y=health_gr["CountyName"]

z=health_gr["Data_Value"]


trace = go.Heatmap(
   x=health_gr["Category"],
   y=health_gr["CountyName"],
    z=z,
    type = 'heatmap',
    colorscale = 'Viridis',
    hovertemplate='Month: %{x}<br>County: %{y}<br>Average_AQI: %{z}<extra></extra>',
    reversescale=True)

fig1 = go.Figure(
    data = trace,
    layout=go.Layout(
        title="<b>Heat Map of Categorical Average AQI for Kentucky, by County</b>",title_x=0.5,
        yaxis={"title": '<b>County Name</b>'},
        xaxis={"title": '<b>Months</b>'},
        font=dict(
        family="Arial, Monospace",
        size=14,),
        width=1200,
        height=1400,legend={"title":"Average AQI"},paper_bgcolor="rgb(0,0,0,0)"
    ),
)
fig1.update_layout(width=900,height=900,showlegend=True)
#fig1.show()
In [76]:
merged_ah["bad_days"]=merged_ah["Days with AQI"]-merged_ah["Good Days"]
In [77]:
merged_ah["%age of bad days"] = (merged_ah["bad_days"]/merged_ah["Days with AQI"])*100
In [78]:
fig5 = px.scatter(merged_ah, x="%age of bad days", y="Data_Value",color='MeasureId',facet_col="Category",facet_col_wrap=1)
#fig5.for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1]))
fig5.update_layout(width=1000,height=1000,showlegend=True)
#fig5.show()
fig5.write_html('2.3_Heat_Map_of_Categorical_Average_AQI_for_Kentucky_by_County.html')

2.4: Comparing data collectio points to sulfur and health¶

In [79]:
# Importing required packages:
import folium
from folium import plugins

# Mapping SO2
data = np.array([merged_ahs["health_lat"],merged_ahs["health_long"]]).T
data_so2 = np.array([so2_reshape["so2_lat"],so2_reshape["so2_long"]]).T


popups_so2 = ['<strong>' + str(round(i,2)) + "</strong>" for i in so2_reshape["so2_avg_aqi"]]
popups = ['<strong>' + str(i) + "</strong>" for i in merged_ahs["MeasureId"]]


marker_so2 = plugins.MarkerCluster(locations = data_so2 , popups=popups_so2)
marker = plugins.MarkerCluster(locations = data, popups=popups)

plotmap1 = folium.Map(location=[37.8393,84.2700],
                     zoom_start=10,
                     control_scale=True,
#                       icon=folium.Icon(icon="cloud",color='red'),
                     titles="Sulfer and Health")


# Plotting SO2 data of site in a 15 mile radius -> crimson 
for i in range(0,len(so2_reshape)):
    folium.Circle(
        location=[so2_reshape.iloc[i]['so2_lat'], so2_reshape.iloc[i]['so2_long']],
        popup=round(so2_reshape.iloc[i]['so2_avg_aqi'],2),
        radius=24140,
        fill=True,
        fill_color='crimson'
    ).add_to(plotmap1)


plotmap1.add_child(marker)
Out[79]:
<folium.vector_layers.Circle at 0x18095d5f8e0>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd1760>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd1eb0>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd1e20>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd1d00>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd1b20>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd19d0>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd1880>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd14f0>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd1430>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd12e0>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd1130>
Out[79]:
<folium.vector_layers.Circle at 0x18095fd10a0>
Out[79]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [80]:
plotmap1.save("2.4_health_so2.html")